{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "RuCLIP_onnx_example.ipynb",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/Lednik7/CLIP-ONNX/blob/main/examples/RuCLIP_onnx_example.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "#@title Allowed Resources\n",
        "import multiprocessing\n",
        "import torch\n",
        "from psutil import virtual_memory\n",
        "\n",
        "ram_gb = round(virtual_memory().total / 1024**3, 1)\n",
        "\n",
        "print('CPU:', multiprocessing.cpu_count())\n",
        "print('RAM GB:', ram_gb)\n",
        "print(\"PyTorch version:\", torch.__version__)\n",
        "print(\"CUDA version:\", torch.version.cuda)\n",
        "print(\"cuDNN version:\", torch.backends.cudnn.version())\n",
        "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")\n",
        "print(\"device:\", device.type)\n",
        "\n",
        "!nvidia-smi"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "cellView": "form",
        "id": "4gfq46gnYcnU",
        "outputId": "41e2054a-e2e4-4bb5-ed39-8bd8bfc639c3"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "CPU: 2\n",
            "RAM GB: 12.7\n",
            "PyTorch version: 1.10.0+cu111\n",
            "CUDA version: 11.1\n",
            "cuDNN version: 8005\n",
            "device: cuda\n",
            "Wed Jan 19 22:10:10 2022       \n",
            "+-----------------------------------------------------------------------------+\n",
            "| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |\n",
            "|-------------------------------+----------------------+----------------------+\n",
            "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
            "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
            "|                               |                      |               MIG M. |\n",
            "|===============================+======================+======================|\n",
            "|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |\n",
            "| N/A   41C    P8     9W /  70W |      3MiB / 15109MiB |      0%      Default |\n",
            "|                               |                      |                  N/A |\n",
            "+-------------------------------+----------------------+----------------------+\n",
            "                                                                               \n",
            "+-----------------------------------------------------------------------------+\n",
            "| Processes:                                                                  |\n",
            "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
            "|        ID   ID                                                   Usage      |\n",
            "|=============================================================================|\n",
            "|  No running processes found                                                 |\n",
            "+-----------------------------------------------------------------------------+\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Restart colab session after installation\n",
        "Reload the session if something doesn't work"
      ],
      "metadata": {
        "id": "whlsBiJgR8le"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "!pip install git+https://github.com/Lednik7/CLIP-ONNX.git\n",
        "!pip install ruclip==0.0.1rc7\n",
        "!pip install onnxruntime-gpu"
      ],
      "metadata": {
        "id": "HnbpAkvuR73L"
      },
      "execution_count": 2,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "%%capture\n",
        "!wget -c -O CLIP.png https://github.com/openai/CLIP/blob/main/CLIP.png?raw=true"
      ],
      "metadata": {
        "id": "tqy0zKM4R-7M"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import onnxruntime\n",
        "\n",
        "# priority device (if available)\n",
        "print(onnxruntime.get_device())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "x8IN72OnSAIh",
        "outputId": "3174cf2c-ace3-4e1f-a550-e16c72302d51"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "GPU\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## RuCLIP\n",
        "WARNING: specific RuCLIP like forward \"model(text, image)\" instead of classic(OpenAI CLIP) \"model(image, text)\""
      ],
      "metadata": {
        "id": "8_wSsSheT5mw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import warnings\n",
        "\n",
        "warnings.filterwarnings(\"ignore\", category=UserWarning)"
      ],
      "metadata": {
        "id": "gZTxanR26knr"
      },
      "execution_count": 1,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import ruclip\n",
        "\n",
        "# onnx cannot export with cuda\n",
        "model, processor = ruclip.load(\"ruclip-vit-base-patch32-384\", device=\"cpu\")"
      ],
      "metadata": {
        "id": "FdTLuqsJUBFY"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from PIL import Image\n",
        "import numpy as np\n",
        "\n",
        "# simple input\n",
        "pil_images = [Image.open(\"CLIP.png\")]\n",
        "labels = ['диаграмма', 'собака', 'кошка']\n",
        "dummy_input = processor(text=labels, images=pil_images,\n",
        "                        return_tensors='pt', padding=True)\n",
        "\n",
        "# batch first\n",
        "image = dummy_input[\"pixel_values\"] # torch tensor [1, 3, 384, 384]\n",
        "image_onnx = dummy_input[\"pixel_values\"].cpu().detach().numpy().astype(np.float32)\n",
        "\n",
        "# batch first\n",
        "text = dummy_input[\"input_ids\"] # torch tensor [3, 77]\n",
        "text_onnx = dummy_input[\"input_ids\"].cpu().detach().numpy()[::-1].astype(np.int64)"
      ],
      "metadata": {
        "id": "rPwc6A2SSGyl"
      },
      "execution_count": 3,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#RuCLIP output\n",
        "logits_per_image, logits_per_text = model(text, image)\n",
        "probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy()\n",
        "\n",
        "print(\"Label probs:\", probs)  # prints: [[0.9885839  0.00894288 0.0024732 ]]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pv0mH626SdzO",
        "outputId": "d563462f-b2a9-4d49-b491-17e88ffa81f0"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Label probs: [[0.9885839  0.00894288 0.0024732 ]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Convert RuCLIP model to ONNX"
      ],
      "metadata": {
        "id": "R_e5OjJeXRiF"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from clip_onnx import clip_onnx\n",
        "\n",
        "visual_path = \"clip_visual.onnx\"\n",
        "textual_path = \"clip_textual.onnx\"\n",
        "\n",
        "onnx_model = clip_onnx(model, visual_path=visual_path, textual_path=textual_path)\n",
        "onnx_model.convert2onnx(image, text, verbose=True)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oYM5FDSGSJBW",
        "outputId": "c647dc2e-946d-4769-c66e-77edfa98237f"
      },
      "execution_count": 5,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[CLIP ONNX] Start convert visual model\n",
            "[CLIP ONNX] Start check visual model\n",
            "[CLIP ONNX] Start convert textual model\n",
            "[CLIP ONNX] Start check textual model\n",
            "[CLIP ONNX] Models converts successfully\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## [ONNX] CPU inference mode"
      ],
      "metadata": {
        "id": "U1Pr-YTtSEhs"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'CPUExecutionProvider']\n",
        "onnx_model.start_sessions(providers=[\"CPUExecutionProvider\"]) # cpu mode"
      ],
      "metadata": {
        "id": "aY9wRe5kT3wG"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "image_features = onnx_model.encode_image(image_onnx)\n",
        "text_features = onnx_model.encode_text(text_onnx)\n",
        "\n",
        "logits_per_image, logits_per_text = onnx_model(image_onnx, text_onnx)\n",
        "probs = logits_per_image.softmax(dim=-1).detach().cpu().numpy()\n",
        "\n",
        "print(\"Label probs:\", probs)  # prints: Label probs: [[0.90831375 0.07174418 0.01994203]]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "tYVuk72nSLw6",
        "outputId": "75bf3803-6ed7-4516-ccd0-42f9cf7f22e0"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Label probs: [[0.90831375 0.07174418 0.01994203]]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit onnx_model.encode_text(text_onnx) # text representation"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Bpu4_HFRVeNk",
        "outputId": "e8f1681b-40dc-495f-d382-f0348d87c412"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1 loop, best of 5: 285 ms per loop\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit onnx_model.encode_image(image_onnx) # image representation"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "JsOccP2gVmpo",
        "outputId": "adb33860-b000-461b-959f-95126e2ac049"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1 loop, best of 5: 412 ms per loop\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "## [ONNX] GPU inference mode"
      ],
      "metadata": {
        "id": "Zww0E-jIULug"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "onnx_model.start_sessions(providers=[\"CUDAExecutionProvider\"]) # cuda mode"
      ],
      "metadata": {
        "id": "PBakYeiQUOAm"
      },
      "execution_count": 10,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit onnx_model.encode_text(text_onnx) # text representation"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "EjvRBvCaWJBL",
        "outputId": "07426652-1cc5-4713-c355-fb4f1bd138d4"
      },
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The slowest run took 5.07 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
            "100 loops, best of 5: 6.89 ms per loop\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "%timeit onnx_model.encode_image(image_onnx) # image representation"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pmu4mQCsWJ8w",
        "outputId": "5cb45026-dfd3-419d-e5d3-f5d0d9681cd0"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "The slowest run took 699.84 times longer than the fastest. This could mean that an intermediate result is being cached.\n",
            "1 loop, best of 5: 18.9 ms per loop\n"
          ]
        }
      ]
    }
  ]
}